clear
clear matrix

set more off
* Elliott
**cd "Z:\home\elliott\Dropbox\1Spring2016\TUP\data"

** Merge files into TUP_endline.dta/csv
use Endline\sections_8_17, clear
merge 1:1 id using Endline\sections_19_24
rename _merge merge_e1
merge 1:1 id using Endline\sections_2_7
rename _merge merge_e2

save Endline\TUP_endline, replace
outsheet using csv\TUP_endline.csv, c replace

* Household roster merge

use Endline\household_roster, clear
gen ischild = age<=18
gen ischild2 = age<=15                
gen issmallchild = age<5              
gen girls    = (age<=15 & sex==0)     
gen boys     = (age<=15 & sex==1)     
gen men      = (age>15  & sex==0)     
gen women    = (age>15  & sex==1)     
gen resp_age = .
replace resp_age = age if rel==1      

collapse (max) hh_size=ln (sum) age=resp_age girls boys men women child_total=ischild children=ischild2 smallchildren=issmallchild, by(id)
sort id
save Endline\HHvars, replace

use Endline\TUP_endline, clear
merge 1:1 id using Endline\HHvars

** Fix id mistakes:

    replace id=1372   if cl_id==4    & id==372
    replace id=1081   if cl_id==10   & id==108
    replace id=1107   if cl_id==127  & id==110
    replace id=1689   if cl_id==289  & id==689
    replace id=1410   if cl_id==371  & id==141
    replace id=1384   if cl_id==406  & id==134
    replace id=1497   if cl_id==527  & id==497
    replace id=1652   if cl_id==232  & id==16320
    replace cl_id=449 if cl_id==4490 & id==1665
    replace id=1525   if cl_id==475  & id==15250
    replace id=1527   if cl_id==476  & id==1525
    replace id=1049   if cl_id==125  & id==10490
    replace id=1275   if cl_id==129  & id==12780
    replace id=2240   if cl_id==722  & id==22400
    replace id=2241   if cl_id==723  & id==2240
    replace id=1094   if cl_id==86   & id==1049
    replace cl_id=582 if cl_id==5820 & id==1896
    ** Assuming duplicates of this form, if they generate conflicts, Were set to id*10 on purpose and by somone trying to pick the right one to include with an accurate id...
    ** Duplicate replace id=1223  if cl_id==74   & id==12230
    ** Duplicate replace cl_id=79 if cl_id==74   & id==1223
    ** Duplicate replace id=1938  if cl_id==294  & id==19380
    ** Duplicate replace id=1665  if cl_id==4490 & id==16650
    ** Duplicate replace id=1488  if cl_id==508  & id==14880
    ** Duplicate replace id=1093  if cl_id==84   & id==9093
    ** Duplicate replace id=1158  if cl_id==118  & id==11580
    ** Duplicate replace id=1106  if cl_id==120  & id==11060
    ** Duplicate replace id=1167  if cl_id==143  & id==11670
    ** Duplicate replace id=1247  if cl_id==146  & id==12470
    ** Duplicate replace id=1124  if cl_id==173  & id==11240
    ** Duplicate replace id=1973  if cl_id==341  & id==19730
    ** Duplicate replace id=2111  if cl_id==631  & id==21110
    ** Duplicate replace id=2246  if cl_id==732  & id==22460
    ** Duplicate replace id=2131  if cl_id==748  & id==21310
    ** Duplicate replace id=1896  if cl_id==5820 & id==18960
    drop if id==108
    drop if id==308
    drop if id==602
    drop if cl_id==74   & id==12230
    drop if cl_id==294  & id==19380
    drop if cl_id==4490 & id==16650
    drop if cl_id==508  & id==14880
    drop if cl_id==84   & id==9093
    drop if cl_id==118  & id==11580
    drop if cl_id==120  & id==11060
    drop if cl_id==143  & id==11670
    drop if cl_id==146  & id==12470
    drop if cl_id==173  & id==11240
    drop if cl_id==341  & id==19730
    drop if cl_id==631  & id==21110
    drop if cl_id==732  & id==22460
    drop if cl_id==748  & id==21310
    drop if cl_id==5820 & id==18960


* NEED TO FIGURE OUT WHICH RAW OBSERVATION IS CORRECT. PICKING THE ONE THAT LOOKS BEST FOR NOW.
drop if cl_id==190 & id>2000 
drop if cl_id==44 & id==12050 
    ** At some point, fix these merge failures.
    ** Note that there are no corresponding _m==2's for these _m==3's
    ** and there are *two* entries for some of these, suggesting they may be duplicates.
    * cl_id	id	respondent	husband
    * 352	1185	VICTORY MARIBA	MARTINA WANI
    * 417	1311	ROSE SITIMA	
    * 452	1676	ROSE ABANYA	JOHN MALISH
    * 734	2247	ROSE SUNDAY	ANDREW WANI
    * 190	170100		


rename _merge merge_hhvars_e
drop if merge_hhvars_e != 3

*** Select which variables to keep.

    * s2: Confidence in access
    * s3_\d(a/b): asset number/value
    * s4(b):  Cash (food) Savings
    * s5: Household Decision making
    * s6: Transfers given/received
    * s7: Confidence in ability
    * s8 & s9: loans given/received
    * s10: Emergency loan availability
    * s11: Meals served
    * s12: Food consumption
    * s13: Food quantity/price
    * s14: Food Outside household
    * s15: Non-food consumption
    * s16: Non-farm Income
    * s17: Land ownership & cultivation
    * s18: Farm & Livestock income
    * s20: Food Security

keep id cl_id cluster girls boys men women children smallchildren hh_size child_total age s2* s3* s4* s5* s6* s7* s8* s9* s11* s12* s15* s16* s17* s20*

*** Re-label variables to match previous years.
    rename id idno

*** Assets & Savings
    rename s3_3b  asset_val_cows
    rename s3_4b  asset_val_smallanimals
    rename s3_5b  asset_val_chickens
    rename s3_6b  asset_val_ducks
    rename s3_7b  asset_val_plough
    rename s3_8b  asset_val_shed
    rename s3_9b  asset_val_shop
    rename s3_10b asset_val_radio
    rename s3_11b asset_val_tv
    rename s3_12b asset_val_fan
    rename s3_13b asset_val_mobile
    rename s3_14b asset_val_chairtables
    rename s3_15b asset_val_bed
    rename s3_16b asset_val_bicycle
    rename s3_17b asset_val_carts
    rename s3_18b asset_val_sewing
    rename s3_19b asset_val_net
    rename s3_21b asset_val_charcoal
    rename s3_22b asset_val_pangas
    rename s3_23b asset_val_axes
    rename s3_24b asset_val_ploughs_again
    rename s3_25b asset_val_stoves
    rename s3_26b asset_val_potspans
    rename s3_27b asset_val_motorcycle
    rename s3_1a  asset_n_house
    rename s3_2a  asset_n_homestead
    rename s3_3a  asset_n_cows
    rename s3_4a  asset_n_smallanimals
    rename s3_5a  asset_n_chickens
    rename s3_6a  asset_n_ducks
    rename s3_7a  asset_n_plough
    rename s3_8a  asset_n_shed
    rename s3_9a  asset_n_shop
    rename s3_10a asset_n_radio
    rename s3_11a asset_n_tv
    rename s3_12a asset_n_fan
    rename s3_13a asset_n_mobile
    rename s3_14a asset_n_chairtables
    rename s3_15a asset_n_bed
    rename s3_16a asset_n_bicycle
    rename s3_17a asset_n_carts
    rename s3_18a asset_n_sewing
    rename s3_19a asset_n_net
    rename s3_21a asset_n_charcoal
    rename s3_22a asset_n_pangas
    rename s3_23a asset_n_axes
    rename s3_24a asset_n_ploughs_again
    rename s3_25a asset_n_stoves
    rename s3_26a asset_n_potspans
    rename s3_27a asset_n_motorcycle

    rename s17_1 land_owncult        
    rename s17_2 land_ownnocult
    rename s17_3 land_ownrent
    rename s17_4 land_rentcult
    rename s17_5 land_communitycult

    rename s4_1 savings_home
    rename s4_2 savings_bank
    rename s4_3 savings_BRAC
    rename s4_4 savings_NGOs
    rename s4_5 savings_other

    rename s4b_1a savings_maize_q
    rename s4b_2a savings_sorghum_q
    rename s4b_1b savings_maize_val
    rename s4b_2b savings_sorghum_val
    gen savings_otherfood_q = s4b_oth_1b + s4b_oth_2b
    gen savings_otherfood_val = s4b_oth_1b + s4b_oth_2b
    ** Data Entry forgot to include code for kind of food stored
    * gen savings_cassava_q=.
    * gen savings_cassava_val=.
    * gen savings_beans_q =.
    * gen savings_beans_val =.
    * replace savings_cassava_q=s4b_oth_1a if s8bcoth==9
    * replace savings_cassava_val=s4b_2c if s8bcoth==9
    * replace savings_beans_q=s4b_1c if s8bcoth==6
    * replace savings_beans_val=s4b_2c if s8bcoth==6

    rename s6_1c transfers_get1
    rename s6_2c transfers_get2
    rename s6_3c transfers_get3
    rename s6_2_1c transfers_give1
    rename s6_2_2c transfers_give2
    rename s6_2_3c transfers_give3

*** Access to services

    rename s2_1 access_healthcenter
    rename s2_2 access_loans
    rename s2_3 access_enrollchild
    rename s2_4 access_foodtransfer
    rename s2_5 access_coops

*** Household Decision Making

    rename s5_1 decide_moneyown
    rename s5_2 decide_healthown
    rename s5_3 decide_majorpurchase
    rename s5_4 decide_dailypurchase
    rename s5_5 decide_familyvisits
    rename s5_6 decide_healthchild
    rename s5_7 decide_pressuretospend
    rename s5_8 decide_arguments
    rename s5_9 decide_fearful


** Confidence & Autonomy

    rename s7_1 conf_business
    rename s7_2 conf_credit
    rename s7_3 conf_finances
    rename s7_4 conf_buybargain
    rename s7_5 conf_sellbargain
    rename s7_6 conf_save

    rename s23a_1 gone_market
    rename s23a_2 gone_school
    rename s23a_3 gone_healthcenter
    rename s23a_4 gone_houserelative
    rename s23a_5 gone_housenonrelative
    rename s23a_6 gone_NGO
    rename s23a_7 gone_water
    rename s23a_8 gone_church
    rename s23b_1 cango_market
    rename s23b_2 cango_school
    rename s23b_3 cango_healthcenter
    rename s23b_4 cango_houserelative
    rename s23b_5 cango_housenonrelative
    rename s23b_6 cango_NGO
    rename s23b_7 cango_water
    rename s23b_8 cango_church


*** ONE OBVIOUS if trivial inaccuracy:

replace asset_n_tv = 1 if asset_n_tv == 10

*** Consumpiton & Expenditure
    egen adult_meals = rowmean(s11b_*)
    egen child_meals = rowmean(s11a_*)
    egen all_meals = rowmean(s11*)
    egen c_cereals = rowtotal(s12a_1-s12a_7) 
    label var c_cereals "s12a_1-7: maize, potato, millet, etc."
    egen c_meat=rowtotal(s12a_12 s12a_13)
    rename s12a_1  c_maize
    rename s12a_2  c_sorghum
    rename s12a_3  c_millet
    rename s12a_4  c_potato
    rename s12a_5  c_sweetpotato
    rename s12a_6  c_rice
    rename s12a_7  c_bread
    rename s12a_8  c_beans
    rename s12a_9  c_oil
    rename s12a_10 c_salt
    rename s12a_11 c_sugar
    rename s12a_12 c_livestock
    rename s12a_13 c_poultry
    rename s12a_14 c_fish
    rename s12a_15 c_egg
    rename s12a_16 c_nuts
    rename s12a_17 c_milk
    rename s12a_18 c_vegetables
    rename s12a_19 c_fruit
    rename s12a_20 c_tea
    rename s12a_21 c_spices
    rename s12a_22 c_alcohol
    rename s12a_23 c_otherfood
                                                              
    rename s15a_1 c_fuel 
    rename s15a_2 c_medicine 
    rename s15a_3 c_airtime 
    rename s15a_4 c_cosmetics
    rename s15a_5 c_soap
    rename s15a_6 c_transport
    rename s15a_7 c_entertainment
    rename s15a_8 c_childcare
    rename s15a_9 c_tobacco 
    rename s15a_10 c_batteries
    rename s15a_12 c_church
    rename s15a_13 c_othermonth
                                                              
    rename s15b_1  c_clothesfootwear 
    rename s15b_2  c_womensclothes
    rename s15b_3  c_childrensclothes
    rename s15b_4  c_shoes
    rename s15b_5  c_homeimprovement
    rename s15b_6  c_utensils
    rename s15b_7  c_furniture
    rename s15b_8  c_textiles
    rename s15b_9  c_ceremonies
    rename s15b_10 c_funerals
    rename s15b_12 c_dowry
    rename s15b_13 c_other

    ** Make Meals-served variable: served_3days
    foreach day of var s11* {
        replace `day'=0 if `day'==.
        }
    egen served_3days = rowtotal(s11*)

* Income

    * Non-Farm Income
    foreach i of num 1 2 3 4 5 {
        rename s16_`i'a  nonfarm_occupation`i'
        rename s16_`i'b  nonfarm_HHmember`i'
        rename s16_`i'c  nonfarm_monthsreceived`i'
        rename s16_`i'd  nonfarm_income_monthly`i'
        rename s16_`i'e  nonfarm_income_year`i'
    }



* Food security 

    rename s20_1 fs_worried
    rename s20_2 fs_notpreferred
    rename s20_3 fs_fewkinds
    rename s20_4 fs_preferrednot
    rename s20_5 fs_portions
    rename s20_6 fs_fewmeals
    rename s20_7 fs_nofood
    rename s20_8 fs_hungry
    rename s20_9 fs_wholeday


****************************************
  * Clean-up all expenditure & asset data:
  * Save raw values * Set blanks to zero
  * Current Top-code Method: 99th percentile
****************************************

foreach item of varlist c_* asset_val* {
    gen raw_`item' = `item'
    replace `item' = 0 if `item'==.
    quietly: sum `item', d
    replace `item' = r(p99) if `item'>r(p99) & `item' != .
    }

** Appending _e tag to specify these are endline variables
foreach variable of varlist * {
    rename `variable' `variable'_e
    }
    
rename cl_id cl_id
rename idno idno

** Tag those few surveys with no non-zeroes or all zeros (enumerators were confused & more informative to just eliminate that data)
    gen cons_issue=1 if min(c_maize_e, c_rice_e, c_bread_e,  c_meat_e, c_poultry_e, c_fish_e, c_egg_e)>0
    replace cons_issue=1 if max(c_cereals, c_beans, c_oil, c_salt, c_sugar, c_meat, c_fish, c_egg, c_milk, c_vegetables, c_fruit, c_spices, c_alcohol)<1
    disp "SURVEYS WITH BOTCHED CONSUMPTION SECTIONS:"
    tab cons_issue
    foreach C of var c_maize_e-c_alcohol_e{ 
        replace `C'=. if cons_issue==1 
        }

sort idno
merge 1:1 idno using Tgroups
drop if _m==2
rename _merge merge_Tgroups_m
sort idno

save Endline\TUP_endline, replace

